{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Speaker verification\n",
    "\n",
    "```python\n",
    "import torch\n",
    "from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding\n",
    "model = PretrainedSpeakerEmbedding(\n",
    "    \"speechbrain/spkrec-ecapa-voxceleb\",\n",
    "    device=torch.device(\"cuda\"))\n",
    "\n",
    "from pyannote.audio import Audio\n",
    "from pyannote.core import Segment\n",
    "audio = Audio(sample_rate=16000, mono=True)\n",
    "\n",
    "# extract embedding for a speaker speaking between t=3s and t=6s\n",
    "speaker1 = Segment(3., 6.)\n",
    "waveform1, sample_rate = audio.crop(\"audio.wav\", speaker1)\n",
    "embedding1 = model(waveform1[None])\n",
    "\n",
    "# extract embedding for a speaker speaking between t=7s and t=12s\n",
    "speaker2 = Segment(7., 12.)\n",
    "waveform2, sample_rate = audio.crop(\"audio.wav\", speaker2)\n",
    "embedding2 = model(waveform2[None])\n",
    "\n",
    "# compare embeddings using \"cosine\" distance\n",
    "from scipy.spatial.distance import cdist\n",
    "distance = cdist(embedding1, embedding2, metric=\"cosine\")\n",
    "```\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "41379f2c2a4eb17f5ac9a1f5014f4b793a0ead0b6469d8877f81a91eb030f53e"
  },
  "kernelspec": {
   "display_name": "Python 3.8.2 64-bit ('pyannote': conda)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
